{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# SFT 训练数据合成"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "from constant import SYSTEM_PROMPT, USER_PROMPT_TPL, parse_user_prompt"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. 合成符合格式的正确答案"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "    \"messages\": [\n",
      "        {\n",
      "            \"role\": \"system\",\n",
      "            \"content\": \"You are a helpful assistant. You first thinks about the reasoning process in the mind and then provides the user with the answer.\"\n",
      "        },\n",
      "        {\n",
      "            \"role\": \"user\",\n",
      "            \"content\": \"Using the numbers 29, 11, 27, 10, create an equation that equals 524. You can use basic arithmetic operations (+, -, *, /) one or multiple times but each number can only be used once, and you must use all the numbers. Show your work in <think> </think> tags. And return the final equation in <answer> </answer> tags, for example <answer>(1 + 2) / 3</answer>. Think step by step inside <think> tags.\"\n",
      "        },\n",
      "        {\n",
      "            \"role\": \"assistant\",\n",
      "            \"content\": \"<think>Step 1: 29 - 10 = 19\\nStep 2: 19 * 27 = 513\\nStep 3: 513 + 11 = 524\\nFinal answer: ((29 - 10) * 27) + 11</think>\\n\\n<answer>((29 - 10) * 27) + 11</answer>\"\n",
      "        }\n",
      "    ]\n",
      "}\n"
     ]
    }
   ],
   "source": [
    "data = []\n",
    "with open(\"data/train_with_steps.jsonl\", \"r\") as f:\n",
    "    for line in f:\n",
    "        d = json.loads(line)\n",
    "        # 将 \"solution_steps\": [[496, \"-\", 80, 416], [416, \"-\", 41, 375], [375, \"-\", 97, 278]] 转换为 Step by step 的thinking，使用固定模板\n",
    "        reasoning_steps = []\n",
    "        for i, step in enumerate(d[\"solution_steps\"]):\n",
    "            reasoning_steps.append(f\"Step {i+1}: {step[0]} {step[1]} {step[2]} = {step[3]}\")\n",
    "        reasoning_steps.append(f\"Final answer: {d['ground_truth_solution']}\")\n",
    "        reasoning = \"\\n\".join(reasoning_steps)\n",
    "        messages = [\n",
    "            {\"role\": \"system\", \"content\": SYSTEM_PROMPT},\n",
    "            {\n",
    "                \"role\": \"user\",\n",
    "                \"content\": parse_user_prompt(USER_PROMPT_TPL, d[\"numbers\"], d[\"target\"]),\n",
    "            },\n",
    "            {\n",
    "                \"role\": \"assistant\",\n",
    "                \"content\": f\"<think>{reasoning}</think>\\n\\n<answer>{d['ground_truth_solution']}</answer>\",\n",
    "            },\n",
    "        ]\n",
    "        data.append({\"messages\": messages})\n",
    "\n",
    "print(json.dumps(data[0], indent=4))\n",
    "\n",
    "with open(\"data/train_sft_simple.jsonl\", \"w\") as f:\n",
    "    for d in data:\n",
    "        f.write(json.dumps(d, ensure_ascii=False) + \"\\n\")\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
